Thank Kieran for providing this code kindly.
secretory <- readRDS("../rds/20180910Fresh_secretory_5_7clusters_clincluster.rds")
sce <- readRDS("../rds/20181011_culture_11553n15072_sceset.rds")
m <- model.matrix(~ sce$total_features + sce$Patient2)
# sce<- scater::normalise(sce)
sce<- scater::normaliseExprs(sce, design = m)
## Warning: 'normalizeExprs' is deprecated.
## Use edgeR::calcNormFactors(), normalize(), limma::removeBatchEffect() directly instead.
exprs(sce) <- norm_exprs(sce)
gene_vars <- rowVars(exprs(sce))
var_cutoff <- sort(gene_vars, decreasing = TRUE)[7500]
sce_hvg <- sce[gene_vars >= var_cutoff, ]
is_na_mgi_symbol <- is.na(rowData(sce_hvg)$mgi_symbol)
rowData(sce_hvg)$mgi_symbol <- rowData(sce_hvg)$Geneid
sce_hvg$time <- NA
sce_hvg$time[sce$source == "Fresh"] <- 0
sce_hvg$time[sce$source == "O.N."] <- 0.5
sce_hvg$time[sce$source == "Long" & sce$Patient2 == 11553] <- 2
sce_hvg$time[sce$source == "Long" & sce$Patient2 == 15072] <- 6
sce$time <- sce_hvg$time
And perform inference:
pc1 <- prcomp(t(exprs(sce_hvg)), scale = TRUE)$x[,1]
pc1 <- scale_vec(pc1)
time_numeric <- as.numeric( sce$time)
pc1 <- pc1 * sign(cor(pc1, time_numeric))
x <- sce_hvg$time
# assay(sce, "norm_exprs") <- normalise(sce)
pcavi <- phenopath(sce_hvg, x, z_init = pc1, sce_assay = "norm_exprs")
## Iteration ELBO Change (%)
## [ 1 ] -12259050.0368339 Inf
## [ 2 ] -12171446.8072936 0.719743765283722
## [ 3 ] -12170149.6926922 0.010658164723716
## [ 4 ] -12169117.1019817 0.00848533794052712
## [ 5 ] -12168202.6395145 0.00751518111869619
## [ 6 ] -12167389.3429862 0.00668423196911412
## [ 7 ] -12166664.2801838 0.00595942146283749
## [ 8 ] -12166014.7024713 0.00533928100860801
## [ 9 ] -12165428.5520254 0.00481816520733624
## [ 10 ] -12164895.1311443 0.00438491968425485
## [ 11 ] -12164405.4242848 0.00402573609110709
## [ 12 ] -12163952.1085442 0.00372671428343931
## [ 13 ] -12163529.3773523 0.00347539911134304
## [ 14 ] -12163132.6869771 0.00326141616134619
## [ 15 ] -12162758.4967358 0.00307652446980571
## [ 16 ] -12162404.0390067 0.00291437225689607
## [ 17 ] -12162067.1317197 0.00277014822631597
## [ 18 ] -12161746.0330607 0.00264023486561942
## [ 19 ] -12161439.3324245 0.00252191067072253
## [ 20 ] -12161145.8700931 0.00241311414643824
## [ 21 ] -12160864.6785853 0.00231226574140791
## [ 22 ] -12160594.939845 0.00221813769470955
## [ 23 ] -12160335.9537883 0.00212976070484431
## [ 24 ] -12160087.1149079 0.00204635771154652
## [ 25 ] -12159847.8945814 0.00196729702986733
## [ 26 ] -12159617.8274345 0.00189205902834269
## [ 27 ] -12159396.5006183 0.00182021218067817
## [ 28 ] -12159183.5452163 0.00175139557045801
## [ 29 ] -12158978.6292401 0.00168530583425554
## [ 30 ] -12158781.4518416 0.001621687167301
## [ 31 ] -12158591.7384759 0.00156032351246122
## [ 32 ] -12158409.2368297 0.00150103227040926
## [ 33 ] -12158233.7133744 0.00144365916455387
## [ 34 ] -12158064.9504408 0.00138807396014201
## [ 35 ] -12157902.7437322 0.00133416685424643
## [ 36 ] -12157746.9002128 0.0012818454000921
## [ 37 ] -12157597.2363143 0.00123103188544425
## [ 38 ] -12157453.5764184 0.00118166106971106
## [ 39 ] -12157315.7515744 0.00113367824607489
## [ 40 ] -12157183.5984215 0.00108703756789537
## [ 41 ] -12157056.958284 0.00104170061779055
## [ 42 ] -12156935.6764191 0.000997635162032823
## [ 43 ] -12156819.6013918 0.000954814096869996
## [ 44 ] -12156708.5845637 0.000913214521302936
## [ 45 ] -12156602.4796753 0.000872816961197061
## [ 46 ] -12156501.142513 0.000833604678941544
## [ 47 ] -12156404.4306463 0.000795563089786837
## [ 48 ] -12156312.2032267 0.000758679261585724
## [ 49 ] -12156224.3208424 0.00072294144954577
## [ 50 ] -12156140.6454164 0.000688338744545721
## [ 51 ] -12156061.0401489 0.000654860709333412
## [ 52 ] -12155985.3694921 0.000622497103050494
## [ 53 ] -12155913.4991578 0.000591237625796828
## [ 54 ] -12155845.2961506 0.000561071693979495
## [ 55 ] -12155780.6288252 0.000531988256134128
## [ 56 ] -12155719.3669619 0.000503975630695985
## [ 57 ] -12155661.3818595 0.000477021369630234
## [ 58 ] -12155606.546441 0.000451112153761454
## [ 59 ] -12155554.73537 0.000426233702044099
## [ 60 ] -12155505.8251747 0.000402370712136175
## [ 61 ] -12155459.6943769 0.000379506814210675
## [ 62 ] -12155416.223624 0.00035762455222534
## [ 63 ] -12155375.2958214 0.000336705380376969
## [ 64 ] -12155336.7962628 0.000316729674050692
## [ 65 ] -12155300.6127566 0.000297676769637783
## [ 66 ] -12155266.6357467 0.000279525006543378
## [ 67 ] -12155234.7584271 0.000262251781114199
## [ 68 ] -12155204.8768453 0.000245833633234485
## [ 69 ] -12155176.8899984 0.000230246315220467
## [ 70 ] -12155150.6999165 0.000215464888064893
## [ 71 ] -12155126.2117348 0.000201463821524594
## [ 72 ] -12155103.3337529 0.000188217090327371
## [ 73 ] -12155081.9774823 0.000175698285269684
## [ 74 ] -12155062.0576803 0.000163880710288516
## [ 75 ] -12155043.4923701 0.000152737505089063
## [ 76 ] -12155026.2028501 0.000142241733947475
## [ 77 ] -12155010.1136879 0.000132366505570438
## [ 78 ] -12154995.1527062 0.000123085049260748
## [ 79 ] -12154981.2509529 0.000114370832406825
## [ 80 ] -12154968.3426643 0.000106197632684426
## [ 81 ] -12154956.3652154 9.85396293624684e-05
## [ 82 ] -12154945.259063 9.13714714951217e-05
## [ 83 ] -12154934.9676805 8.46683467986703e-05
## [ 84 ] -12154925.4374839 7.84060471984323e-05
## [ 85 ] -12154916.6177526 7.25610187252703e-05
## [ 86 ] -12154908.460545 6.71103993937042e-05
## [ 87 ] -12154900.9206081 6.20320725382023e-05
## [ 88 ] -12154893.9552847 5.73046824635015e-05
## [ 89 ] -12154887.5244173 5.29076666722531e-05
## [ 90 ] -12154881.5902498 4.88212691333772e-05
## [ 91 ] -12154876.1173279 4.50265548959752e-05
## [ 92 ] -12154871.0723984 4.15054136670513e-05
## [ 93 ] -12154866.4243092 3.82405616244464e-05
## [ 94 ] -12154862.14391 3.52155306615942e-05
## [ 95 ] -12154858.2039528 3.24146697363987e-05
## [ 96 ] -12154854.5789952 2.98231263273443e-05
## [ 97 ] -12154851.2453048 2.7426830094624e-05
## [ 98 ] -12154848.1807665 2.52124767083394e-05
## [ 99 ] -12154845.3647919 2.31675067715933e-05
## [ 100 ] -12154842.7782316 2.12800804362642e-05
## [ 101 ] -12154840.4032904 1.95390567069628e-05
## [ 102 ] -12154838.2234458 1.79339661086352e-05
## [ 103 ] -12154836.2233698 1.6454981192614e-05
## [ 104 ] -12154834.3888541 1.50928893559169e-05
## [ 105 ] -12154832.7067376 1.38390759118821e-05
## [ 106 ] -12154831.1648391 1.26854783352119e-05
## [ 107 ] -12154829.7518925 1.1624569270271e-05
## [ 108 ] -12154828.457484 1.06493360450285e-05
## [ 109 ] -12154827.2719951 9.75323508339251e-06
Save for decision analysis:
dec_df <- data_frame(gene = rownames(sce_hvg),
m = pcavi$m_beta[1,],
s = pcavi$s_beta[1,])
# write_csv(dec_df, "../tables/Phenopath/dec_df_new.csv")
Plot the elbo:
plot_elbo(pcavi) +
xlab(expression("CAVI iterations")) +
theme_cowplot(font_size = 11)
ggsave("../plots/20181012phenopath/elbo.png", width = 5, height = 3)
xs <- sce_hvg$time
qplot(scale_vec(pc1), pcavi$m_z, color = factor(xs)) +
scale_color_brewer(palette = "Set1", name = "Source") +
#stat_function(fun = function(x) x, color = 'black') +
xlab("PC1") + ylab(expression(z))
qplot(scale_vec(pc1), pcavi$m_z, color = factor(sce$Sample.Name)) +
scale_color_brewer(palette = "Set1", name = "Source") +
#stat_function(fun = function(x) x, color = 'black') +
xlab("PC1") + ylab(expression(z))
df_beta <- data_frame(
lambda = pcavi$m_lambda,
beta = pcavi$m_beta[1,],
chi = pcavi$chi_exp[1,],
alpha = pcavi$m_alpha[1,],
pos_sd = sqrt(pcavi$s_beta[1,]),
gene = rowData(sce_hvg)$Geneid, #stringr::str_to_title(rownames(sce_hvg)),
# ensembl_gene_id = rowData(sce_hvg)$ensembl_gene_id,
is_sig = significant_interactions(pcavi, n = 3) #[,1]
)
# write_csv(df_beta, "../tables/Phenopath/df_gene_shalek.csv")
df_beta$is_sig_graph <-
plyr::mapvalues(df_beta$is_sig, from = c(FALSE, TRUE),
to = c("Non-significant", "Significant"))
This gives:
sum(df_beta$is_sig)
## [1] 1055
significant interactions.
We can plot these:
chi_cutoff <- 0.15
df_beta <- dplyr::mutate(df_beta,
gene_symbol = gene)
ggplot(df_beta, aes(x = beta, y = 1 / chi, color = is_sig_graph)) +
geom_point(alpha = 0.8) +
theme(legend.title = element_blank()) +
# geom_point(data = dplyr::filter(df_beta, is_mmr), alpha = 0.6) +
scale_color_brewer(palette = "Set1", name = "'Significant'") +
geom_label_repel(data = dplyr::filter(df_beta, 1 / chi > chi_cutoff), aes(label = gene_symbol)) +
xlab(expression(beta)) +
ylab(expression(paste("[", chi, "]"^-1)))
Which genes are sig?
dplyr::filter(df_beta, is_sig) %>% dplyr::arrange(desc(abs(beta))) %>%
dplyr::select(gene, beta, alpha) %>%
datatable()
We can plot expression along phenotime for the 35 significant genes:
tmap <- pcavi$m_z
top_genes <- dplyr::filter(df_beta, is_sig) %>%
dplyr::arrange(desc(abs(beta))) %>%
extract2("gene") %>% head(n=20)
Check z loosely corresponds to time:
zdf <- data_frame(z = pcavi$m_z, time = as.factor(sce_hvg$time), stimulant = xs, patient = sce_hvg$Patient)
ggplot(zdf, aes(x = time, y = z, fill = time)) +
geom_violin(alpha = 0.8) +
theme(legend.position = "none") +
scale_fill_brewer(palette = "Set3") +
# scale_fill_brewer(palette = "Set1") +
xlab("Capture time") +
ylab("Pseudotime)") +
facet_grid(~patient, scales = "free")
theme(axis.text = element_text(size = 8),
axis.title = element_text(size = 9),
legend.title = element_text(size = 11),
legend.text = element_text(size = 10))
## List of 4
## $ axis.title :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : num 9
## ..$ hjust : NULL
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi FALSE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.text :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : num 8
## ..$ hjust : NULL
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi FALSE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ legend.text :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : num 10
## ..$ hjust : NULL
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi FALSE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ legend.title:List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : num 11
## ..$ hjust : NULL
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi FALSE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## - attr(*, "class")= chr [1:2] "theme" "gg"
## - attr(*, "complete")= logi FALSE
## - attr(*, "validate")= logi TRUE
# ggsave(filename = "../plots/20181012phenopath/violin_pseudotimeVSpatient.png",width = 8, height = 2 )
time_plot <- last_plot()
zdf <- data_frame(z = pcavi$m_z, time = as.factor(sce_hvg$time), stimulant = xs, patient = sce_hvg$Patient)
ggplot(zdf, aes(x = time, y = z, fill = time)) +
geom_violin(alpha = 0.8) +
theme(legend.position = "none") +
scale_fill_brewer(palette = "Set3") +
# scale_fill_brewer(palette = "Set1") +
xlab("Capture time (day)") +
ylab("Pseudotime") +
# facet_grid(~patient, scales = "free")
theme(axis.text = element_text(size = 12),
axis.title = element_text(size = 15),
legend.title = element_text(size = 15),
legend.text = element_text(size = 15))
# ggsave(filename = "../plots/20181012phenopath/violin_pseudotime.png",width = 3, height = 3)
# ggsave(filename = "../manuscript_plots/PhenoPath/violin_pseudotime.png",width = 3, height = 3)
# time_plot <- last_plot()
select <- dplyr::select
sce_hvg <- runPCA(sce_hvg, ncomponents = 3, return_SCE = TRUE)
tmap <- pcavi$m_z
sce_hvg$tmap <- tmap
df_pca <- as_data_frame(reducedDim(sce_hvg)[,c(1,2)]) %>%
mutate(pseudotime = sce_hvg$tmap, Stimulant = sce_hvg$source)
df_no_stim <- select(df_pca, -Stimulant)
df_pca$Stimulant <- factor(df_pca$Stimulant, levels = c("Fresh","O.N.","Long"))
plt <- ggplot(df_pca, aes(x = PC1, y = PC2)) +
geom_point(data = df_no_stim, fill = "grey80", color = "grey80", size = 3) +
geom_point(aes(fill = pseudotime), shape = 21, color = 'grey20', size = 3) +
scale_fill_viridis(name = "Pseudotime", option = "C") +
facet_wrap(~ Stimulant) +
theme(legend.position = c(.4,.08),
legend.direction = "horizontal") +
theme(strip.background = element_rect(fill = "grey95", color = "grey30", size = 1),
legend.text = element_text(size = 8),
axis.text = element_text(size = 9),
axis.title = element_text(size = 10),
legend.title = element_text(size = 10)) +
xlim(-18, 20) + ylim(-15, 10)
plt
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
# ggsave("../plots/20181012phenopath/PC1PC2.png", width = 15,height = 5)
tiff
## function (filename = "Rplot%03d.tiff", width = 480, height = 480,
## units = "px", pointsize = 12, compression = c("none", "rle",
## "lzw", "jpeg", "zip", "lzw+p", "zip+p"), bg = "white",
## res = NA, ..., type = c("cairo", "Xlib", "quartz"), antialias)
## {
## if (!checkIntFormat(filename))
## stop("invalid 'filename'")
## g <- .geometry(width, height, units, res)
## new <- list(...)
## type <- if (!missing(type))
## match.arg(type)
## else getOption("bitmapType")
## if (!missing(antialias))
## new$antialias <- match.arg(antialias, aa.cairo)
## d <- check.options(new, name.opt = ".X11.Options", envir = .X11env)
## antialias <- match(d$antialias, aa.cairo)
## comp <- switch(match.arg(compression), none = 1L, rle = 2L,
## lzw = 5L, jpeg = 7L, zip = 8L, `lzw+p` = 15L, `zip+p` = 18L)
## if (type == "quartz" && capabilities("aqua")) {
## width <- g$width/ifelse(is.na(res), 72, res)
## height <- g$height/ifelse(is.na(res), 72, res)
## invisible(.External(C_Quartz, "tiff", path.expand(filename),
## width, height, pointsize, d$family, d$antialias !=
## "none", "", bg, "white", if (is.na(res)) NULL else res))
## }
## else if (type == "cairo" && capabilities("cairo"))
## invisible(.External(C_devCairo, filename, 8L, g$width,
## g$height, pointsize, bg, res, antialias, comp, d$family,
## 300))
## else invisible(.External2(C_X11, paste0("tiff::", comp, ":",
## filename), g$width, g$height, pointsize, d$gamma, d$colortype,
## d$maxcubesize, bg, bg, d$fonts, res, 0L, 0L, "", 0, 0,
## d$family))
## }
## <bytecode: 0x7fb0be5a58f8>
## <environment: namespace:grDevices>
library(grid)
genes <- c("FOS", "JUN", "CCND1")
mm <- match(genes, rowData(sce_hvg)$mgi_symbol)
gexp <- t(exprs(sce_hvg)[mm, ])
colnames(gexp) <- genes
gexp_df <- as_data_frame(gexp) %>%
mutate(pseudotime = sce_hvg$tmap)
df_bar <- inner_join(df_pca, gexp_df)
## Joining, by = "pseudotime"
classify_cell <- function(row) {
if(row['pseudotime'] < 0.2) return("beginning")
if(row['pseudotime'] > 0.2 && row['Stimulant'] == "O.N.") return("Overnight-end")
if(row['pseudotime'] > 0.2 && row['Stimulant'] == "Long") return("Long-end")
return(NA)
}
df_bar$cell_class <- sapply(seq_len(nrow(df_bar)), function(i) classify_cell(df_bar[i, ]))
df_group <- filter(df_bar, !is.na(cell_class)) %>%
gather(gene, expression, -(PC1:Stimulant), -cell_class) %>%
group_by(cell_class, gene) %>%
summarise(mean_expression = mean(expression))
df_group$gene <- factor(df_group$gene, levels = genes)
plts <- list()
for(cl in unique(df_group$cell_class)) {
plts[[cl]] <- filter(df_group, cell_class == cl) %>%
ggplot(aes(x = gene, y = mean_expression, fill = gene)) +
geom_bar(stat = "identity", color = 'grey20') +
scale_fill_brewer(palette = "Dark2") +
theme(legend.position = "None",
axis.title.x = element_blank(),
axis.title.y = element_text(size = 8, face = "bold"),
axis.text.y = element_text(size = 8),
axis.text.x = element_text(size = 8, angle = -90, face = "bold")) +
labs(y = "Mean z-score\n expression") +
ylim(-6, 6)
}
width = 0.12
height = 0.3
pca_plot <- ggdraw() +
draw_plot(plt) +
draw_plot(plts[["beginning"]], x = 0.41, y = 0.2, width = width, height = height) +
draw_plot(plts[["beginning"]], x = 0.88, y = 0.15, width = width, height = height) +
draw_plot(plts[["Overnight-end"]], x = 0.06, y = 0.105, width = width, height = height) +
draw_plot(plts[["Long-end"]], x = 0.56, y = 0.61, width = width, height = height)
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
pca_plot
make_gene_plot <- function(genes) {
inds <- match(genes, rowData(sce_hvg)$Geneid)
df <- t(exprs(sce_hvg))[, inds, drop=FALSE]
colnames(df) <- genes #rownames(sce_hvg)[inds]
df <- as_data_frame(df) %>%
dplyr::mutate(phenopath = pcavi$m_z, x = factor(sce_hvg$source,levels = c("Fresh","O.N.","Long"))) %>%
gather(gene, expression, -phenopath, -x)
df$gene <- factor(df$gene, levels = genes)
df_nox <- dplyr::select(df, -x)
ggplot(df, aes(x = phenopath, y = expression, fill = x, color = x)) +
geom_point(data = df_nox, color = "grey80", fill = "grey80") +
geom_point(shape = 21, color = 'grey30', alpha = 0.3) +
facet_grid(gene ~ x, scales = "free") +
scale_fill_brewer(palette = "Set1", name = "MSI") +
scale_color_brewer(palette = "Set1", name = "MSI") +
theme(legend.position = "none",
strip.text = element_text(face = "bold"),
strip.text.x = element_text(size = 10, margin = margin(.1, 0, .1, 0, "cm")),
strip.text.y = element_text(size = 8, margin = margin(0, .1, 0, .1, "cm")),
strip.background = element_rect(colour = "black", fill = "grey95",
size = 0.3, linetype = 1)) +
theme(axis.text = element_text(size = 9),
axis.text.y = element_text(size = 8),
axis.title = element_text(size = 10)) +
stat_smooth(se = FALSE, method = "lm", size = 1.5, formula = y ~ ns(x,3),
color = 'grey30') +
ylab(expression("Normalised expression")) +
xlab("Pseudotime")
}
genes <- c("ESR1","LGR5")
make_gene_plot(genes)
# ggsave("../manuscript_plots/PhenoPath/ESR1_LGR5.png", height = 3, width = 8)
# gene_plot <- plot_over_pseudotime <- last_plot()
Now do the same for original time series:
inds <- match(genes, rowData(sce_hvg)$mgi_symbol)
df <- t(exprs(sce_hvg))[, inds, drop=FALSE]
colnames(df) <- genes
df <- as_data_frame(df) %>%
dplyr::mutate(capture_time = sce_hvg$time, x = as.character(xs)) %>%
gather(gene, expression, -capture_time, -x)
df$gene <- factor(df$gene, levels = genes)
df_g <- group_by(df, capture_time, x, gene) %>%
summarise(median_expression = median(expression))
ggplot(df, aes(x = capture_time, y = expression, fill = x, color = x)) +
# geom_beeswarm(shape = 21, color = 'grey30', alpha = 0.3) +
geom_jitter(shape = 21, color = 'grey30', alpha = 0.3, width= 0.1) +
facet_grid(gene ~ x, scales = "free") +
scale_fill_brewer(palette = "Set1") +
scale_color_brewer(palette = "Set1") +
theme(legend.position = "none",
strip.text = element_text(face = "bold"),
strip.text.x = element_text(size = 10, margin = margin(.1, 0, .1, 0, "cm")),
strip.text.y = element_text(size = 8, margin = margin(0, .1, 0, .1, "cm")),
strip.background = element_rect(colour = "black", fill = "grey95",
size = 0.3, linetype = 1)) +
theme(axis.text = element_text(size = 9),
axis.text.y = element_text(size = 8),
axis.title = element_text(size = 10)) +
ylab(expression("Normalised expression")) +
xlab("Capture time") +
geom_point(data = df_g, aes(y = median_expression),
shape = 21, color = 'black', size = 2) +
geom_line(data = df_g, aes(y = median_expression, group = 1))
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?
plot_over_capture_time <- last_plot()
# ggsave("../../figs/supplementary_time_shalek.png", width = 6, height = 4)
We’ll use limma-voom for standard differential expression:
dge <- DGEList(counts(sce_hvg))
dge <- calcNormFactors(dge)
design <- model.matrix(~ x, colData(sce_hvg))
v <- voom(dge, design, plot = TRUE)
fit <- lmFit(v, design)
fit <- eBayes(fit)
results <- decideTests(fit)
df_limma <- data_frame(coef = fit$coefficients[,2],
pval = fit$p.value[,2],
mu = pcavi$m_mu,
gene_symbol = rowData(sce_hvg)$Geneid) %>%
left_join(df_beta, by = "gene_symbol") %>%
dplyr::mutate(qval = p.adjust(pval, method = "BH"),
log10qval = -log10(qval))
cols <- RColorBrewer::brewer.pal(3, "Set2")
cols2 <- c("#c5e2d9", cols[2])
ggplot(df_limma, aes(x = beta, y = log10qval, color = is_sig_graph)) +
geom_point() +
ylab(expression(paste("Limma Voom -", log[10], "(q-value)"))) +
xlab(expression(paste("PhenoPath ", beta))) +
geom_hline(yintercept = -log10(0.05), linetype = 2, alpha = 0.5) +
theme(axis.text = element_text(size = 8),
axis.title = element_text(size = 9),
axis.title.y = element_text(size = 8)) +
theme(legend.title = element_text(size = 9),
legend.text = element_text(size = 8)) +
scale_color_manual(values = cols2, name = "Interaction") +
theme(legend.margin = margin(), legend.position = "none")
de_plot <- ggExtra::ggMarginal(last_plot(), margins = "y", type = "histogram", size = 10)
Again no obvious relationship.
df_beta <- mutate(df_beta,
crossover = - alpha / beta)
filter(df_beta, is_sig) %>%
ggplot(aes(x = crossover)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# write_csv(df_beta, "../tables/Phenopath/shalek_interactions.csv")
textinfo <- frame_data(
~x, ~y, ~label,
1.2, 0.2, "Gene upregulated along pseudotime\nLPS increases upregulation",
1.2, -0.2, "Gene upregulated along pseudotime\nPAM increases upregulation",
-1.3, 0.2, "Gene downregulated along pseudotime\nPAM increases downregulation",
-1.3, -0.2, "Gene downregulated along pseudotime\nLPS increases downregulation"
)
cols <- RColorBrewer::brewer.pal(3, "Set2")
cols2 <- c("#c5e2d9", cols[2])
outline_cols = c("#c5e2d9", 'black')
ggplot(df_beta, aes(x = lambda, y = beta)) +
geom_point(shape = 21, aes(fill = is_sig_graph, color = is_sig_graph)) +
geom_vline(xintercept = 0, linetype = 2, alpha = 0.5) +
geom_hline(yintercept = 0, linetype = 2, alpha = 0.5) +
scale_fill_manual(values = cols2, name = "Interaction") +
scale_color_manual(values = outline_cols, name = "Interaction") +
geom_text_repel(data = dplyr::filter(df_beta, (beta > 1 & is_sig) | beta < -1),
aes(label = gene), color = 'black',
size = 3) +
ylab("Covariate-pseudotime interaction") +
xlab("Gene regulation over pseudotime") +
theme(legend.position = 'bottom', #, legend.margin = margin(),
axis.text = element_text(size = 10),
axis.title = element_text(size = 11),
legend.title = element_text(size = 11),
legend.text = element_text(size = 10))
# geom_text(data = textinfo, aes(x = x, y = y, label = label),
# color = 'black', size = 3, fontface = "bold")
lplot <- ggExtra::ggMarginal(last_plot(), margins = "y", type = "histogram", size = 10)
#
# ggsave("../manuscript_plots/")
print(lplot)
genome <- "hg19"
# supportedGeneIDs()
id <- "geneSymbol"
# source("https://bioconductor.org/biocLite.R")
# biocLite("org.Hs.eg.db")
all_genes <- rowData(sce_hvg)$Geneid
upreg_genes <- filter(df_beta, is_sig, beta > 0) %>% extract2("gene_symbol")
downreg_genes <- filter(df_beta, is_sig, beta < 0) %>% extract2("gene_symbol")
upg <- 1 * (all_genes %in% upreg_genes)
downg <- 1 * (all_genes %in% downreg_genes)
names(upg) <- names(downg) <- all_genes
pwfup <- nullp(upg, genome = "hg19", id = "geneSymbol")
## Loading hg19 length data...
goup <- goseq(pwfup, genome = "hg19", id = "geneSymbol", test.cats = "GO:BP")
## Fetching GO annotations...
## Loading required package: AnnotationDbi
##
## Attaching package: 'AnnotationDbi'
## The following object is masked _by_ '.GlobalEnv':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
##
## For 479 genes, we could not find any categories. These genes will be excluded.
## To force their use, please run with use_genes_without_cat=TRUE (see documentation).
## This was the default behavior for version 1.15.1 and earlier.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
pwfdown <- nullp(downg, genome = "hg19", id = "geneSymbol")
## Loading hg19 length data...
## Warning in pcls(G): initial point very close to some inequality constraints
godown <- goseq(pwfdown, genome = "hg19", id = "geneSymbol", test.cats = "GO:BP")
## Fetching GO annotations...
## For 479 genes, we could not find any categories. These genes will be excluded.
## To force their use, please run with use_genes_without_cat=TRUE (see documentation).
## This was the default behavior for version 1.15.1 and earlier.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
Graph results:
parse_go <- function(go, type, n_tested) {
go <- go %>%
mutate(qval = p.adjust(over_represented_pvalue)) %>%
mutate(log10qval = -log10(qval),
prop_in_cat = numInCat / n_tested) %>%
head(n = 6) %>%
mutate(type = type) %>%
tbl_df() %>%
arrange(desc(log10qval))
go
}
gos <- bind_rows(
parse_go(goup, "Up-regulated", length(upreg_genes)),
parse_go(godown, "Down-regulated", length(downreg_genes))
)
# gos <- filter(gos, type == "LPS up-regulated")
gos$term <- stringr::str_to_title(gos$term)
gos$term <- factor(gos$term, levels = gos$term[order(gos$log10qval)])
gos <- mutate(gos, prop_cat = numDEInCat / numInCat)
gos %>%
filter(qval < 0.05) %>%
ggplot(aes(x = term, y = log10qval)) + #, size = 100 * prop_cat)) +
geom_segment(aes(y = min(log10qval - 1), yend = log10qval, x = term, xend = term),
color = 'grey30', linetype = 3) +
geom_point(aes(color = type, shape = type)) +
coord_flip() +
facet_wrap(~ type, ncol = 1, scales = "free") +
theme(axis.title.y = element_blank()) +
ylab(expression(paste("-", log[10], " q-value"))) +
scale_size(name = "% category\nrepresented") +
theme(legend.position = "bottom", #c(0.6,0.1),
legend.direction = "horizontal",
axis.text = element_text(size = 8),
axis.text.y = element_text(size = 8),
axis.title = element_text(size = 10),
legend.title = element_blank(),
legend.text = element_text(size = 10),
legend.margin = margin(t = -.3, l = -4.5, unit = "cm")) +
scale_color_brewer(palette = "Set1") +
theme(strip.text = element_text(size = 8, face = "bold"),
strip.background = element_rect(colour = "black", fill = "grey95",
size = 0.5, linetype = 1))
# ggsave("../../figs/go.png", width = 5, height = 5)
# goplot <- last_plot()
sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid splines stats4 parallel stats graphics grDevices
## [8] utils datasets methods base
##
## other attached packages:
## [1] org.Hs.eg.db_3.6.0 AnnotationDbi_1.42.1
## [3] bindrcpp_0.2.2 ggbeeswarm_0.6.0
## [5] phenopath_1.4.0 goseq_1.32.0
## [7] geneLenDataBase_1.16.0 BiasedUrn_1.07
## [9] RColorBrewer_1.1-2 edgeR_3.22.5
## [11] limma_3.36.5 DT_0.4
## [13] viridis_0.5.1 viridisLite_0.3.0
## [15] cowplot_0.9.3 ggrepel_0.8.0
## [17] magrittr_1.5 forcats_0.3.0
## [19] stringr_1.3.1 dplyr_0.7.6
## [21] purrr_0.2.5 readr_1.1.1
## [23] tidyr_0.8.1 tibble_1.4.2
## [25] tidyverse_1.2.1 scater_1.8.4
## [27] SingleCellExperiment_1.2.0 SummarizedExperiment_1.10.1
## [29] DelayedArray_0.6.6 BiocParallel_1.14.2
## [31] matrixStats_0.54.0 GenomicRanges_1.32.7
## [33] GenomeInfoDb_1.16.0 IRanges_2.14.12
## [35] S4Vectors_0.18.3 ggplot2_3.0.0
## [37] Biobase_2.40.0 BiocGenerics_0.26.0
##
## loaded via a namespace (and not attached):
## [1] colorspace_1.3-2 rjson_0.2.20
## [3] rprojroot_1.3-2 XVector_0.20.0
## [5] rstudioapi_0.8 bit64_0.9-7
## [7] lubridate_1.7.4 xml2_1.2.0
## [9] tximport_1.8.0 knitr_1.20
## [11] jsonlite_1.5 Rsamtools_1.32.3
## [13] broom_0.5.0 GO.db_3.6.0
## [15] shinydashboard_0.7.0 shiny_1.1.0
## [17] compiler_3.5.1 httr_1.3.1
## [19] backports_1.1.2 assertthat_0.2.0
## [21] Matrix_1.2-14 lazyeval_0.2.1
## [23] cli_1.0.1 later_0.7.5
## [25] prettyunits_1.0.2 htmltools_0.3.6
## [27] tools_3.5.1 gtable_0.2.0
## [29] glue_1.3.0 GenomeInfoDbData_1.1.0
## [31] reshape2_1.4.3 Rcpp_0.12.19
## [33] cellranger_1.1.0 Biostrings_2.48.0
## [35] nlme_3.1-137 rtracklayer_1.40.6
## [37] crosstalk_1.0.0 DelayedMatrixStats_1.2.0
## [39] rvest_0.3.2 miniUI_0.1.1.1
## [41] mime_0.6 XML_3.98-1.16
## [43] zlibbioc_1.26.0 scales_1.0.0
## [45] hms_0.4.2 promises_1.0.1
## [47] rhdf5_2.24.0 yaml_2.2.0
## [49] memoise_1.1.0 gridExtra_2.3
## [51] biomaRt_2.36.1 ggExtra_0.8
## [53] stringi_1.2.4 RSQLite_2.1.1
## [55] GenomicFeatures_1.32.3 rlang_0.2.2
## [57] pkgconfig_2.0.2 bitops_1.0-6
## [59] evaluate_0.12 lattice_0.20-35
## [61] Rhdf5lib_1.2.1 bindr_0.1.1
## [63] labeling_0.3 GenomicAlignments_1.16.0
## [65] htmlwidgets_1.3 bit_1.1-14
## [67] tidyselect_0.2.5 plyr_1.8.4
## [69] R6_2.3.0 DBI_1.0.0
## [71] mgcv_1.8-24 pillar_1.3.0
## [73] haven_1.1.2 withr_2.1.2
## [75] RCurl_1.95-4.11 modelr_0.1.2
## [77] crayon_1.3.4 rmarkdown_1.10
## [79] progress_1.2.0 locfit_1.5-9.1
## [81] readxl_1.1.0 data.table_1.11.8
## [83] blob_1.1.1 digest_0.6.18
## [85] xtable_1.8-3 httpuv_1.4.5
## [87] munsell_0.5.0 beeswarm_0.2.3
## [89] vipor_0.4.5